#####################################
### Extension CODE BY cHENG CHENG ###
#####################################

# These correponds the plots in extension sections and first 2 sections in the appendix

### BEFORE RUNNING THE CODE ###
# The code contains material in Chinese, as the default setting for R will result in corrupted Chinese charaters, first change settings
# "Tools" -> "Global Options" -> "Code" -> "Saving" -> Change "default text encoding" into "UTF-8" (Thanks Max!)
# You should have correct chinese charaters now!


# Let's get started!

#Set work directory
#Chane into your work directory
setwd("C:/harvard/spring_2019/GOV 2001/cheng's project/using dan's data")


# clean work space 
.rs.restartR()
rm(list=ls())

#Load libraries
library(rio)
library(dplyr)
library(readstata13)
library(stargazer)
#install.packages("sem")
library(sem)
library(ggplot2)
library(Matching)
library(ebal)
library(xtable)
library(MatchIt)
library(sandwich)
library(mvtnorm)
#install.packages('matrixStats') 
library(matrixStats)
library(multiwayvcov)
library(reshape2)
# an additional github for matchfrontier (for resolving mysterious errors due to messy coding of match frontier lol)
install_github("ChristopherLucas/MatchingFrontier", ref = "9f050a39fb619608601521ae8e9bef84fc6422f0")
#install.packages("MatchingFrontier") 
library(MatchingFrontier)


# Aggregated data set
load("village_data_replication.Rdata")
repdata <- village
rm(village) 
load("village_v11.Rdata")
# some variables are not included in the village dataset
village.both <- cbind(repdata[,1&11], village)

# The survery experiment data set, named "z"
load("endorsement_experiment_replication.Rdata")

# The original CGSS 2005 data set
CGSS2005 <- read.dta13("cgss2005_14.dta") 
CGSS2005_rural=CGSS2005[which(CGSS2005$qa5_01=="农业户口"),]



#########################
### DESCRIPTIVE TABLE ###
#########################

attach(CGSS2005_rural)

# REASON FOR LAND EXPROPRIATION 
sum(qg02b1=="yes",na.rm=TRUE) # 102
sum(qg02b2=="yes",na.rm=TRUE) # 91
sum(qg02b3=="yes",na.rm=TRUE) # 923
sum(qg02b4=="yes",na.rm=TRUE) # 1080


detach(CGSS2005_rural)

#########################
### REGRESSION TABLES ###
#########################

attach(village.both)

# ENTERPRISE MODEL [HYPO 1] #
lm17 <- lm(seize~embedded+clan.any.mean+I(qa08a+qa08b))
lm18 <- lm(seize~embedded+clan.any.mean+county.distance+terrain+soil.quality+province+(log(income.mean))+I(qa08a+qa08b))
lm19 <- lm(seize~embedded+clan.any.mean+county.distance+terrain+soil.quality+election.township.mean+township.distance+province+(log(income.mean))+I(qa08a+qa08b))
lm20 <- lm(seize~embedded+clan.any.mean+county.distance+terrain+soil.quality+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province+(log(income.mean))+I(qa08a+qa08b))
stargazer(lm17,lm18,lm19,lm20)


# INTERACTION MODEL [HYPO 2] #
lm45 <- lm(seize~embedded+clan.any.mean+I(qa08a+qa08b)+I((qa08a+qa08b)*embedded), data=village.both)
lm46 <- lm(seize~embedded+clan.any.mean+county.distance+terrain+soil.quality+province+(log(income.mean))+I(qa08a+qa08b)++I((qa08a+qa08b)*embedded), data=village.both)
lm47 <- lm(seize~embedded+clan.any.mean+county.distance+terrain+soil.quality+election.township.mean+township.distance+province+(log(income.mean))+I(qa08a+qa08b)++I((qa08a+qa08b)*embedded), data=village.both)
lm48 <- lm(seize~embedded+clan.any.mean+county.distance+terrain+soil.quality+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province+(log(income.mean))+I(qa08a+qa08b)++I((qa08a+qa08b)*embedded), data=village.both)
stargazer(lm45,lm46,lm47,lm48)

# TIME INVESTMENT MODEL [HYPO 3] #
lm21 <- lm(qc07e ~ embedded+clan.any.mean+I(qa08a+qa08b),data=village)
lm22 <- lm(qc07e ~ embedded+clan.any.mean+county.distance+terrain+soil.quality+province+log.income.median+I(qa08a+qa08b), data=village)
lm23 <- lm(qc07e ~ embedded+clan.any.mean+county.distance+terrain+soil.quality+election.township.mean+township.distance+province+log.income.median+I(qa08a+qa08b), data=village)
lm24 <- lm(qc07e ~ embedded+clan.any.mean+county.distance+terrain+soil.quality+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province+log.income.median+I(qa08a+qa08b), data=village)
stargazer(lm21,lm22,lm23,lm24) 

detach(village.both)

#############
### PLOTS ###
#############

### EXTENSION PLOT FROM SURVEY EXPERIMENT ###

# for those who have a lineage cadre
z1 <- z[which(z$lineage_member_influential_cadre=="是"),]
villager <- t.test(z1$endorse_itt[which(z1$treat=="村民")])
lineage <- t.test(z1$endorse_itt[which(z1$treat=="家族长老")])
cadre <- t.test(z1$endorse_itt[which(z1$treat=="村干部")])

villager_90 <- t.test(z1$endorse_itt[which(z1$treat=="村民")],conf.level = 0.90) 
lineage_90 <- t.test(z1$endorse_itt[which(z1$treat=="家族长老")],conf.level = 0.90)
cadre_90 <- t.test(z1$endorse_itt[which(z1$treat=="村干部")],conf.level = 0.90)

figure1 <- as.data.frame(matrix(rep(NA,18),nrow=3))
colnames(figure1)<- c("category","est","CI_up95","CI_lw95","CI_up90","CI_lw90")
row.names(figure1)<- c("Villager","LIneage Leader","Village Official")
endorsement.tr <- c("Villager","LIneage Leader","Village Official")
figure1 <- cbind(endorsement.tr,figure1)
figure1$endorsement.tr <- as.factor(figure1$endorsement.tr)

figure1[,1]<-as.vector(1:3)
figure1[,2]<-as.vector(c(villager$estimate,lineage$estimate,cadre$estimate))
figure1[,3]<-as.vector(c(villager$conf.int[2],lineage$conf.int[2],cadre$conf.int[2]))
figure1[,4]<-as.vector(c(villager$conf.int[1],lineage$conf.int[1],cadre$conf.int[1]))
figure1[,5]<-as.vector(c(villager_90$conf.int[2],lineage_90$conf.int[2],cadre_90$conf.int[2]))
figure1[,6]<-as.vector(c(villager_90$conf.int[1],lineage_90$conf.int[1],cadre_90$conf.int[1]))

theme_bw1 <- function(base_size = 13, base_family = "") {
  theme_grey(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.x =       element_text(size = base_size, colour = "black",  hjust = .5 , vjust=1),
      axis.text.y =       element_text(size = base_size , colour = "black", hjust = 0 , vjust=.5 ), # changes position of X axis text
      axis.ticks =        element_line(colour = "grey50"),
      #   axis.title.y =      element_text(size = base_size,angle=90,vjust=.01,hjust=.1),
      legend.position = "none"
    )
}

# plotting
p1 <- ggplot(data=figure1,aes(x=endorsement.tr,y=est))+geom_point()+
  geom_pointrange(data=figure1,aes(x=figure1$endorsement.tr,ymin=figure1$CI_lw95,ymax=figure1$CI_up95),color="grey",size=0.6)+
  geom_pointrange(data=figure1,aes(x=figure1$endorsement.tr,ymin=figure1$CI_lw90,ymax=figure1$CI_up90),color="black",size=0.9)+
  scale_y_continuous(limits=c(0.1,0.55))+labs(x="Endorsement treatment",y="Endorsement Probability")


p1 <- p1+theme_bw1()
p1

# for those who don't have a lineage cadre
z2 <- z[which(z$lineage_member_influential_cadre=="否"),]
villager2 <- t.test(z2$endorse_itt[which(z2$treat=="村民")])
lineage2 <- t.test(z2$endorse_itt[which(z2$treat=="家族长老")])
cadre2 <- t.test(z2$endorse_itt[which(z2$treat=="村干部")])

villager_902 <- t.test(z2$endorse_itt[which(z2$treat=="村民")],conf.level = 0.90) 
lineage_902 <- t.test(z2$endorse_itt[which(z2$treat=="家族长老")],conf.level = 0.90)
cadre_902 <- t.test(z2$endorse_itt[which(z2$treat=="村干部")],conf.level = 0.90)

figure2 <- as.data.frame(matrix(rep(NA,18),nrow=3))
colnames(figure2)<- c("category","est","CI_up95","CI_lw95","CI_up90","CI_lw90")
row.names(figure2)<- c("Villager","LIneage Leader","Village Official")
figure2[,1]<-as.vector(1:3)
figure2[,2]<-as.vector(c(villager2$estimate,lineage2$estimate,cadre2$estimate))
figure2[,3]<-as.vector(c(villager2$conf.int[2],lineage2$conf.int[2],cadre2$conf.int[2]))
figure2[,4]<-as.vector(c(villager2$conf.int[1],lineage2$conf.int[1],cadre2$conf.int[1]))
figure2[,5]<-as.vector(c(villager_902$conf.int[2],lineage_902$conf.int[2],cadre_902$conf.int[2]))
figure2[,6]<-as.vector(c(villager_902$conf.int[1],lineage_902$conf.int[1],cadre_902$conf.int[1]))

figure2 <- cbind(endorsement.tr,figure2)
figure2$endorsement.tr <- as.factor(figure2$endorsement.tr)


# plotting
p2 <- ggplot(data=figure2,aes(x=endorsement.tr,y=est))+geom_point()+
  geom_pointrange(data=figure2,aes(x=figure2$endorsement.tr,ymin=figure2$CI_lw95,ymax=figure2$CI_up95),color="grey",size=0.6)+
  geom_pointrange(data=figure2,aes(x=figure2$endorsement.tr,ymin=figure2$CI_lw90,ymax=figure2$CI_up90),color="black",size=0.9)+
  scale_y_continuous(name = "",limits=c(0.1,0.55))+labs(x="Endorsement treatment", y ="Endorsement Probability")

p2 <- p2+theme_bw1()

p2

### ROBUST STANDARD ERROR PLOT ###
lm48 <- lm(seize~embedded+clan.any.mean+county.distance+terrain+soil.quality+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province+(log(income.mean))+I(qa08a+qa08b)++I((qa08a+qa08b)*embedded), data=village.both)
lm48_re_hc0 <- as.numeric(diag(vcovHC(lm48, type="HC0")))
lm48_re_hc1 <- as.numeric(diag(vcovHC(lm48, type="HC1"))) 
lm48_re_hc2 <- as.numeric(diag(vcovHC(lm48, type="HC2")))
lm48_re_hc3 <- as.numeric(diag(vcovHC(lm48, type="HC3")))
lm48_re_hc4 <- as.numeric(diag(vcovHC(lm48, type="HC4")))
lm48_re_hc5 <- as.numeric(diag(vcovHC(lm48, type="HC5")))
lm48_re_const <- as.numeric(diag(vcovHC(lm48, type="const")))
lm48_re_classic <- as.numeric(diag(vcov(lm48)))
lm48_clrse <- as.numeric(diag(cluster.vcov(lm48,cluster=province)))
rse <- cbind(lm48_re_const,lm48_re_hc0,lm48_re_hc1,lm48_re_hc2,lm48_re_hc3,lm48_re_hc4,lm48_re_hc5,lm48_re_classic,lm48_clrse)
vars <- c("intercept","embedded","clan.any.mean","county.distance","terrain","soil.quality","election.mean","township.dist","surname.indx","ethnic.indx","log.hh","province5","province6","province7","province8","province9",
          "province11","province12","province13","province14","province15","province16","province17","province18","province19",
          "province20","province21","province23","province24","province25","province26","province27","province28","log income","number enterprise","enterprise*embedded")

rse <- cbind(vars,rse)
rse <- as.data.frame(rse)

rse$lm48_re_const_num <- as.numeric(lm48_re_const)
rse$lm48_re_hc0_num <- as.numeric(lm48_re_hc0)
rse$lm48_re_hc1_num <- as.numeric(lm48_re_hc1)
rse$lm48_re_hc2_num <- as.numeric(lm48_re_hc2)
rse$lm48_re_hc3_num <- as.numeric(lm48_re_hc3)
rse$lm48_re_hc4_num <- as.numeric(lm48_re_hc4)
rse$lm48_re_hc5_num <- as.numeric(lm48_re_hc5)
rse$lm48_re_classic_num <- as.numeric(lm48_re_classic)
rse$lm48_clrse_num <- as.numeric(lm48_clrse)

rse_full <- rse
rse <- rse_full[1:11,]
rse <- rbind(rse, rse_full[34:36,])


rse.clean <- as.data.frame(cbind(as.numeric(rse$lm48_re_const_num),as.numeric(rse$lm48_re_hc0_num),as.numeric(rse$lm48_re_hc1_num),
                                 as.numeric(rse$lm48_re_hc2_num),as.numeric(rse$lm48_re_hc3_num),
                                 as.numeric(rse$lm48_re_hc4_num),as.numeric(rse$lm48_re_hc5_num),as.numeric(rse$lm48_re_classic_num),
                                 as.numeric(rse$lm48_clrse_num)))
rse.clean <- cbind(as.character(rse$vars), rse.clean)
colnames(rse.clean) <- c("Vars","Const","HC0","HC1","HC2","HC3","HC4","HC5","Classic","Clustered RSE")
rse.clean[,-1] <- sqrt(rse.clean[,-1])
rse.melt <- melt(rse.clean, id.vars = 'Vars', variable.name = 'Type', 
                 value.name = 'vals')

ggplot(rse.melt, aes(x = Vars, y = vals, shape = Type, color = Type)) +
  geom_point() +
  scale_shape_manual(values = c(0,1,2,3,4,5,6,7,8)) + 
  scale_color_manual(values = c("black","black","black","black","black","black","black","red","green")) +
  coord_flip() + labs(y="Standard Errors",x="Name of Covariates")

########################
## APPENDIX MATERIALS ##
########################

### MAHALANOBIS MATCHING ###
lm48 <- lm(seize~embedded+clan.any.mean+county.distance+terrain+soil.quality+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province+(log(income.mean))+I(qa08a+qa08b)++I((qa08a+qa08b)*embedded), data=village.both)

my.model.names <- c("seize","embedded","clan.any.mean","county.distance","terrain","soil.quality","election.township.mean","township.distance","surname.index","ethnic.index","log.hh","province","income.mean","qa08a","qa08b")
my.mod <- village.both[my.model.names]
my.mod <- na.omit(my.mod)
my.mod$province <- as.integer(my.mod$province) # without this line, province is not integer so cannot match
summary(my.mod)

# match
bal.formula <- formula(embedded ~ seize+ clan.any.mean+county.distance+terrain+soil.quality+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province+income.mean+qa08a+qa08b)
mb.unmatched <- MatchBalance(bal.formula,data=my.mod,print.level=0)
name <- colnames(my.mod[-2])
tab.unmatched <- baltest.collect(mb.unmatched,var.names = name,after = F)
print(xtable(tab.unmatched[,1:6]),comment=F,size="scriptsize") # the before match table

# pick covariates
covars <- c("clan.any.mean","county.distance","terrain","soil.quality","election.township.mean","township.distance","surname.index","ethnic.index","log.hh","province","income.mean","qa08a","qa08b")
m.first <- Match(Y=my.mod$seize,Tr=my.mod$embedded,X=my.mod[,covars],Weight=2)
summary(m.first)

# after match
mb.matched <- MatchBalance(bal.formula,data=my.mod,match.out=m.first,print.level=0)
tab.matched <- baltest.collect(mb.matched, var.names = name)
print(xtable(tab.matched[,1:6]),comment=F,size="scriptsize")
# Mahalanobis matching doesn't seem to help at all

### MATCH FRONTIER ###

ft.obj <- makeFrontier(data=my.mod,treatment="embedded", 
                       outcome="seize", match.on=covars)
plotFrontier(ft.obj, xlab = "Number of Observations Pruned",
             ylab = ft.obj$metric)
sim.frm <- formula(seize ~ embedded)
dep.frm <- formula(seize ~ embedded+clan.any.mean+county.distance+terrain+soil.quality+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province+income.mean+qa08a+qa08b)
set.seed(02138)
ee.front <- estimateEffects(ft.obj, formula=sim.frm, prop.estimated = .25,
                            mod.dependence.formula =dep.frm, continuous.vars = c("clan.any.mean"))
typeof(ft.obj)

estimateEffects(ft.obj, formula=sim.frm, prop.estimated = .25,
                mod.dependence.formula =dep.frm, continuous.vars = c("log.hh"),means.as.cutpoints=TRUE) 

coefs <- as.data.frame(ee.front$coefs)
coefs <- cbind(ee.front$Xs,coefs)
CIs <- as.data.frame(ee.front$CIs)
CIS <- t(CIs)
CIS <- as.data.frame(CIS)
colnames(CIS) <- c("low","up")
CIS <- cbind(ee.front$Xs, CIS)
ggplot()+geom_line(data=coefs,aes(x=coefs$`ee.front$Xs`,y=coefs$`ee.front$coefs`))+
  geom_ribbon(data=CIS, aes(x=CIS$`ee.front$Xs`,ymin=CIS$low,ymax=CIS$up, alpha=0.1))+
  labs(x="X's", y="Coefficients", title="Estimated effect")



################################
####### CODE BY FLORA LI #######
################################

#Load libraries
library(xtable)
library(devtools)


#################################
## MAIN TEXT MODELS WITH LOGIT ##
#################################

# Table 3
lm1 <- lm(seize~embedded+clan.any.max, data=village)
lm2 <- lm(seize~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+province, data=village)
lm3 <- lm(seize~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+province, data=village)
lm4 <- lm(seize~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province, data=village)
stargazer(lm1,lm2,lm3,lm4,covariate.labels = c("Lineage leader is cadre", "Active lineage", "Distance to county seat",
                                               "Terrain roughness", "Agricultural suitability index", "Wealth (nighttime lights proxy)", 
                                               "Township control over elections", "Distance to township", "Surname fragmentation index",
                                               "Ethnic fragmentation index", "Number of households (log)"),
          omit = "province")

# Robust Standard Errors

lm1.se <- coef(summary(lm1))[, 2]
lm2.se <- coef(summary(lm2))[, 2]
lm3.se <- coef(summary(lm3))[, 2]
lm4.se <- coef(summary(lm4))[, 2]

lm1.rse <- sqrt(diag(vcovHC(lm1)))
lm2.rse <- sqrt(diag(vcovHC(lm2)))
lm3.rse <- sqrt(diag(vcovHC(lm3)))
lm4.rse <- sqrt(diag(vcovHC(lm4)))

lm1.crse <- sqrt(diag(cluster.vcov(lm1, village$province)))
lm2.crse <- sqrt(diag(cluster.vcov(lm2, village$province)))
lm3.crse <- sqrt(diag(cluster.vcov(lm3, village$province)))
lm4.crse <- sqrt(diag(cluster.vcov(lm4, village$province)))

stargazer(lm1, lm1, lm1, se = list(lm1.se, lm1.rse, lm1.crse), covariate.labels = c("Lineage leader is cadre", "Active lineage", "Distance to county seat",
                                                                                    "Terrain roughness", "Agricultural suitability index", "Wealth (nighttime lights proxy)", 
                                                                                    "Township control over elections", "Distance to township", "Surname fragmentation index",
                                                                                    "Ethnic fragmentation index", "Number of households (log)"),
          omit = "province")
stargazer(lm2, lm2, lm2, se = list(lm1.se, lm2.rse, lm2.crse), covariate.labels = c("Lineage leader is cadre", "Active lineage", "Distance to county seat",
                                                                                    "Terrain roughness", "Agricultural suitability index", "Wealth (nighttime lights proxy)", 
                                                                                    "Township control over elections", "Distance to township", "Surname fragmentation index",
                                                                                    "Ethnic fragmentation index", "Number of households (log)"),
          omit = "province")
stargazer(lm3, lm3, lm3, se = list(lm2.se, lm3.rse, lm3.crse), covariate.labels = c("Lineage leader is cadre", "Active lineage", "Distance to county seat",
                                                                                    "Terrain roughness", "Agricultural suitability index", "Wealth (nighttime lights proxy)", 
                                                                                    "Township control over elections", "Distance to township", "Surname fragmentation index",
                                                                                    "Ethnic fragmentation index", "Number of households (log)"),
          omit = "province")
stargazer(lm4, lm4, lm4, se = list(lm4.se, lm4.rse, lm4.crse), covariate.labels = c("Lineage leader is cadre", "Active lineage", "Distance to county seat",
                                                                                    "Terrain roughness", "Agricultural suitability index", "Wealth (nighttime lights proxy)", 
                                                                                    "Township control over elections", "Distance to township", "Surname fragmentation index",
                                                                                    "Ethnic fragmentation index", "Number of households (log)"),
          omit = "province")


# logit
lgs.glm1 <- glm(seize~embedded+clan.any.max, data=village,family=binomial(link=logit))
lgs.glm2 <- glm(seize~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+province, data=village,family=binomial(link=logit))
lgs.glm3 <- glm(seize~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+province, data=village,family=binomial(link=logit))
lgs.glm4 <- glm(seize~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province, data=village,family=binomial(link=logit))
stargazer(lgs.glm1,lgs.glm2,lgs.glm3,lgs.glm4,covariate.labels = c("Lineage leader is cadre", "Active lineage", "Distance to county seat",
                                                                   "Terrain roughness", "Agricultural suitability index", "Wealth (nighttime lights proxy)", 
                                                                   "Township control over elections", "Distance to township", "Surname fragmentation index",
                                                                   "Ethnic fragmentation index", "Number of households (log)"),
          omit = "province")

glm1.se <- coef(summary(lgs.glm1))[,2]
glm2.se <- coef(summary(lgs.glm2))[,2]
glm3.se <- coef(summary(lgs.glm3))[,2]
glm4.se <- coef(summary(lgs.glm4))[,2]

glm1.rse <- sqrt(diag(vcovHC(lgs.glm1)))
glm2.rse <- sqrt(diag(vcovHC(lgs.glm2)))
glm3.rse <- sqrt(diag(vcovHC(lgs.glm3)))
glm4.rse <- sqrt(diag(vcovHC(lgs.glm4)))

glm1.crse <- sqrt(diag(cluster.vcov(lgs.glm1, village$province)))
glm2.crse <- sqrt(diag(cluster.vcov(lgs.glm2, village$province)))
glm3.crse <- sqrt(diag(cluster.vcov(lgs.glm3, village$province)))
glm4.crse <- sqrt(diag(cluster.vcov(lgs.glm4, village$province)))

stargazer(lgs.glm1, lgs.glm1, se = list(glm1.rse, glm1.crse), covariate.labels = c("Lineage leader is cadre", "Active lineage", "Distance to county seat",
                                                                                   "Terrain roughness", "Agricultural suitability index", "Wealth (nighttime lights proxy)", 
                                                                                   "Township control over elections", "Distance to township", "Surname fragmentation index",
                                                                                   "Ethnic fragmentation index", "Number of households (log)"),
          omit = "province")
stargazer(lgs.glm2, lgs.glm2, se = list(glm2.rse, glm2.crse), covariate.labels = c("Lineage leader is cadre", "Active lineage", "Distance to county seat",
                                                                                   "Terrain roughness", "Agricultural suitability index", "Wealth (nighttime lights proxy)", 
                                                                                   "Township control over elections", "Distance to township", "Surname fragmentation index",
                                                                                   "Ethnic fragmentation index", "Number of households (log)"),
          omit = "province")
stargazer(lgs.glm3, lgs.glm3, se = list(glm3.rse, glm3.crse), covariate.labels = c("Lineage leader is cadre", "Active lineage", "Distance to county seat",
                                                                                   "Terrain roughness", "Agricultural suitability index", "Wealth (nighttime lights proxy)", 
                                                                                   "Township control over elections", "Distance to township", "Surname fragmentation index",
                                                                                   "Ethnic fragmentation index", "Number of households (log)"),
          omit = "province")
stargazer(lgs.glm4, lgs.glm4, lgs.glm4, se = list(glm4.se, glm4.rse, glm4.crse), covariate.labels = c("Lineage leader is cadre", "Active lineage", "Distance to county seat",
                                                                                                      "Terrain roughness", "Agricultural suitability index", "Wealth (nighttime lights proxy)", 
                                                                                                      "Township control over elections", "Distance to township", "Surname fragmentation index",
                                                                                                      "Ethnic fragmentation index", "Number of households (log)"),
          omit = "province")


# Table 4
## lm
lm1 <- lm(seize~embedded.formal+clan.formal, data=village)
lm2 <- lm(seize~embedded.formal+clan.formal+county.distance+terrain+soil.quality+night.lights+province, data=village)
lm3 <- lm(seize~embedded.formal+clan.formal+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+province, data=village)
lm4 <- lm(seize~embedded.formal+clan.formal+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province, data=village)
stargazer(lm1, lm2, lm3, lm4)

lm1.se <- coef(summary(lm1))[, 2]
lm2.se <- coef(summary(lm2))[, 2]
lm3.se <- coef(summary(lm3))[, 2]
lm4.se <- coef(summary(lm4))[, 2]

lm1.rse <- sqrt(diag(vcovHC(lm1)))
lm2.rse <- sqrt(diag(vcovHC(lm2)))
lm3.rse <- sqrt(diag(vcovHC(lm3)))
lm4.rse <- sqrt(diag(vcovHC(lm4)))

lm1.crse <- sqrt(diag(cluster.vcov(lm1, village$province)))
lm2.crse <- sqrt(diag(cluster.vcov(lm2, village$province)))
lm3.crse <- sqrt(diag(cluster.vcov(lm3, village$province)))
lm4.crse <- sqrt(diag(cluster.vcov(lm4, village$province)))

stargazer(lm1, lm1, lm1, se = list(lm1.se, lm1.rse, lm1.crse))
stargazer(lm2, lm2, lm2, se = list(lm2.se, lm2.rse, lm2.crse))
stargazer(lm3, lm3, lm3, se = list(lm3.se, lm3.rse, lm3.crse))
stargazer(lm4, lm4, lm4, se = list(lm4.se, lm4.rse, lm4.crse))

## logit with glm
lgs.lm1 <- glm(seize~embedded.formal+clan.formal, data=village,family=binomial(link=logit))
lgs.lm2 <- glm(seize~embedded.formal+clan.formal+county.distance+terrain+soil.quality+night.lights+province, data=village,family=binomial(link=logit))
lgs.lm3 <- glm(seize~embedded.formal+clan.formal+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+province, data=village,family=binomial(link=logit))
lgs.lm4 <- glm(seize~embedded.formal+clan.formal+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province, data=village,family=binomial(link=logit))
stargazer(lgs.lm1, lgs.lm2, lgs.lm3, lgs.lm4)

glm1.rse <- sqrt(diag(vcovHC(lgs.lm1)))
glm2.rse <- sqrt(diag(vcovHC(lgs.lm2)))
glm3.rse <- sqrt(diag(vcovHC(lgs.lm3)))
glm4.rse <- sqrt(diag(vcovHC(lgs.lm4)))

glm1.crse <- sqrt(diag(cluster.vcov(lgs.lm1, village$province)))
glm2.crse <- sqrt(diag(cluster.vcov(lgs.lm2, village$province)))
glm3.crse <- sqrt(diag(cluster.vcov(lgs.lm3, village$province)))
glm4.crse <- sqrt(diag(cluster.vcov(lgs.lm4, village$province)))

stargazer(lgs.lm1, lgs.lm1, se = list(glm1.rse, glm1.crse))
stargazer(lgs.lm2, lgs.lm2, se = list(glm2.rse, glm2.crse))
stargazer(lgs.lm3, lgs.lm3, se = list(glm3.rse, glm3.crse))
stargazer(lgs.lm4, lgs.lm4, se = list(glm4.rse, glm4.crse))

# Table 5
lm1 <- lm(seize.placebo~embedded+clan.any.max, data=village)
lm2 <- lm(seize.placebo~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+province, data=village)
lm3 <- lm(seize.placebo~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+province, data=village)
lm4 <- lm(seize.placebo~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province, data=village)
stargazer(lm1, lm2, lm3, lm4)

lm1.se <- coef(summary(lm1))[, 2]
lm2.se <- coef(summary(lm2))[, 2]
lm3.se <- coef(summary(lm3))[, 2]
lm4.se <- coef(summary(lm4))[, 2]

lm1.rse <- sqrt(diag(vcovHC(lm1)))
lm2.rse <- sqrt(diag(vcovHC(lm2)))
lm3.rse <- sqrt(diag(vcovHC(lm3)))
lm4.rse <- sqrt(diag(vcovHC(lm4)))

lm1.crse <- sqrt(diag(cluster.vcov(lm1, village$province)))
lm2.crse <- sqrt(diag(cluster.vcov(lm2, village$province)))
lm3.crse <- sqrt(diag(cluster.vcov(lm3, village$province)))
lm4.crse <- sqrt(diag(cluster.vcov(lm4, village$province)))

stargazer(lm1, lm1, lm1, se = list(lm1.se, lm1.rse, lm1.crse))
stargazer(lm2, lm2, lm2, se = list(lm2.se, lm2.rse, lm2.crse))
stargazer(lm3, lm3, lm3, se = list(lm3.se, lm3.rse, lm3.crse))
stargazer(lm4, lm4, lm4, se = list(lm4.se, lm4.rse, lm4.crse))


# logit
lgt.lm1 <- glm(seize.placebo~embedded+clan.any.max, data=village,family=binomial(link=logit))
lgt.lm2 <- glm(seize.placebo~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+province, data=village,family=binomial(link=logit))
lgt.lm3 <- glm(seize.placebo~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+province, data=village,family=binomial(link=logit))
lgt.lm4 <- glm(seize.placebo~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province, data=village,family=binomial(link=logit))
stargazer(lgt.lm1, lgt.lm2, lgt.lm3, lgt.lm4)

glm1.rse <- sqrt(diag(vcovHC(lgt.lm1)))
glm2.rse <- sqrt(diag(vcovHC(lgt.lm2)))
glm3.rse <- sqrt(diag(vcovHC(lgt.lm3)))
glm4.rse <- sqrt(diag(vcovHC(lgt.lm4)))

glm1.crse <- sqrt(diag(cluster.vcov(lgt.lm1, village$province)))
glm2.crse <- sqrt(diag(cluster.vcov(lgt.lm2, village$province)))
glm3.crse <- sqrt(diag(cluster.vcov(lgt.lm3, village$province)))
glm4.crse <- sqrt(diag(cluster.vcov(lgt.lm4, village$province)))

stargazer(lgt.lm1, lgt.lm1, se = list(glm1.rse, glm1.crse))
stargazer(lgt.lm2, lgt.lm2, se = list(glm2.rse, glm2.crse))
stargazer(lgt.lm3, lgt.lm3, se = list(glm3.rse, glm3.crse))
stargazer(lgt.lm4, lgt.lm4, se = list(glm4.rse, glm4.crse))

# Table 6
lm1 <- lm(petition.max~embedded+seize+embedded*seize, data=village)
lm2 <- lm(petition.max~embedded+seize+embedded*seize+terrain+soil.quality+night.lights+province, data=village)
lm3 <- lm(petition.max~embedded+seize+embedded*seize+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+province, data=village)
lm4 <- lm(petition.max~embedded+seize+embedded*seize+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province, data=village)
stargazer(lm1, lm2, lm3, lm4)

lm1.se <- coef(summary(lm1))[, 2]
lm2.se <- coef(summary(lm2))[, 2]
lm3.se <- coef(summary(lm3))[, 2]
lm4.se <- coef(summary(lm4))[, 2]

lm1.rse <- sqrt(diag(vcovHC(lm1)))
lm2.rse <- sqrt(diag(vcovHC(lm2)))
lm3.rse <- sqrt(diag(vcovHC(lm3)))
lm4.rse <- sqrt(diag(vcovHC(lm4)))

lm1.crse <- sqrt(diag(cluster.vcov(lm1, village$province)))
lm2.crse <- sqrt(diag(cluster.vcov(lm2, village$province)))
lm3.crse <- sqrt(diag(cluster.vcov(lm3, village$province)))
lm4.crse <- sqrt(diag(cluster.vcov(lm4, village$province)))

stargazer(lm1, lm1, lm1, se = list(lm1.se, lm1.rse, lm1.crse))
stargazer(lm2, lm2, lm2, se = list(lm2.se, lm2.rse, lm2.crse))
stargazer(lm3, lm3, lm3, se = list(lm3.se, lm3.rse, lm3.crse))
stargazer(lm4, lm4, lm4, se = list(lm4.se, lm4.rse, lm4.crse))

# logit
lgt.lm1 <- glm(petition.max~embedded+seize+embedded*seize, data=village,family=binomial(link=logit))
lgt.lm2 <- glm(petition.max~embedded+seize+embedded*seize+terrain+soil.quality+night.lights+province, data=village,family=binomial(link=logit))
lgt.lm3 <- glm(petition.max~embedded+seize+embedded*seize+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+province, data=village,family=binomial(link=logit))
lgt.lm4 <- glm(petition.max~embedded+seize+embedded*seize+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province, data=village,family=binomial(link=logit))
stargazer(lgt.lm1, lgt.lm2, lgt.lm3, lgt.lm4)

glm1.rse <- sqrt(diag(vcovHC(lgt.lm1)))
glm2.rse <- sqrt(diag(vcovHC(lgt.lm2)))
glm3.rse <- sqrt(diag(vcovHC(lgt.lm3)))
glm4.rse <- sqrt(diag(vcovHC(lgt.lm4)))

glm1.crse <- sqrt(diag(cluster.vcov(lgt.lm1, village$province)))
glm2.crse <- sqrt(diag(cluster.vcov(lgt.lm2, village$province)))
glm3.crse <- sqrt(diag(cluster.vcov(lgt.lm3, village$province)))
glm4.crse <- sqrt(diag(cluster.vcov(lgt.lm4, village$province)))

stargazer(lgt.lm1, lgt.lm1, se = list(glm1.rse, glm1.crse))
stargazer(lgt.lm2, lgt.lm2, se = list(glm2.rse, glm2.crse))
stargazer(lgt.lm3, lgt.lm3, se = list(glm3.rse, glm3.crse))
stargazer(lgt.lm4, lgt.lm4, se = list(glm4.rse, glm4.crse))


##########################################
## REPLICATION OF FIGURE 1 IN MAIN TEXT###
##########################################
### Loading data and packages
load("endorsement_experiment_replication.RData")
#View(z)
library(ggplot2)


### Prep for plotting
# the variable endorse means the villager endorse(1) or not (0)
# the variable treat means which treatment does the villager see (the assumed figure who endorsed the plan and spread the news)
# confidence interval of t test
# By default: 95%, the grey CI in figure 1
villager <- t.test(z$endorse_itt[which(z$treat=="村民")])
lineage <- t.test(z$endorse_itt[which(z$treat=="家族长老")])
cadre <- t.test(z$endorse_itt[which(z$treat=="村干部")])
# 90% CI, the black CI in figure 1
villager_90 <- t.test(z$endorse_itt[which(z$treat=="村民")],conf.level = 0.90) 
lineage_90 <- t.test(z$endorse_itt[which(z$treat=="家族长老")],conf.level = 0.90)
cadre_90 <- t.test(z$endorse_itt[which(z$treat=="村干部")],conf.level = 0.90)

###Prep for plotting
# create a dataframe which contains the CI's and point estimates
#View(villager)
figure1 <- as.data.frame(matrix(rep(NA,18),nrow=3))
colnames(figure1)<- c("category","est","CI_up95","CI_lw95","CI_up90","CI_lw90")
row.names(figure1)<- c("Villager","LIneage Leader","Village Official")
figure1[,1]<-factor(c("Villager","LIneage Leader","Village Official"), levels = c("Villager","LIneage Leader","Village Official"))
figure1[,2]<-as.vector(c(villager$estimate,lineage$estimate,cadre$estimate))
figure1[,3]<-as.vector(c(villager$conf.int[2],lineage$conf.int[2],cadre$conf.int[2]))
figure1[,4]<-as.vector(c(villager$conf.int[1],lineage$conf.int[1],cadre$conf.int[1]))
figure1[,5]<-as.vector(c(villager_90$conf.int[2],lineage_90$conf.int[2],cadre_90$conf.int[2]))
figure1[,6]<-as.vector(c(villager_90$conf.int[1],lineage_90$conf.int[1],cadre_90$conf.int[1]))
#View(figure1)

### plotting
ggplot(data=figure1,aes(x=category,y=est))+geom_point()+
  geom_pointrange(data=figure1,aes(x=figure1$category,ymin=figure1$CI_lw95,ymax=figure1$CI_up95),color="grey",size=0.6)+
  geom_pointrange(data=figure1,aes(x=figure1$category,ymin=figure1$CI_lw90,ymax=figure1$CI_up90),color="black",size=0.9)+
  scale_y_continuous(name = "",limits=c(0.1,0.55)) + ggtitle("Survey Experiment")

#########################
## EXTENSION: MATCHING ##
#########################

village <- na.omit(village)
village$province <- as.integer(village$province)
bal.formula <- as.formula(embedded ~ + clan.any.max + county.distance + terrain + soil.quality + night.lights + election.township.mean + township.distance + surname.index + ethnic.index + log.hh + province)
mb.unmatched <- MatchBalance(bal.formula, data = village, print.level = 0)
covars <- c("clan.any.max", "county.distance", "terrain", "soil.quality", "night.lights", "election.township.mean", "township.distance", "surname.index", "ethnic.index", "log.hh", "province")
tab.unmatched <- baltest.collect(mb.unmatched, var.names = covars, after = F)
print(xtable(tab.unmatched[,1:6]), comment = F, size = "scriptsize")

m.first <- Match(Y = village$seize, Tr = village$embedded, X = village[,covars], Weight = 2)
summary(m.first)
mb.matched <- MatchBalance(bal.formula, data = village, print.level = 0)
tab.matched <- baltest.collect(mb.matched, var.names = covars, after = F)
print(xtable(tab.matched[,1:6]), comment = F, size = "scriptsize")

frontier <- makeFrontier(dataset = village, treatment = "embedded", outcome = "seize", match.on = covars)
plotFrontier(frontier)
my.formula <- as.formula(seize~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province)
estimates <- estimateEffects(frontier, formula = "seize ~ embedded", mod.dependence.formula = my.formula, continuous.vars = c("county.distance", "terrain", "soil.quality", "night.lights", "election.township.mean", "township.distance", "surname.index", "ethnic.index", "log.hh", "province"), prop.estimated = .25, seed = 02138, means.as.cutpoints = TRUE)
plotEstimates(estimates)
frontier.dataset <- generateDataset(frontier, N = 175)
model.pruned <- lm(seize~embedded+clan.any.max+county.distance+terrain+soil.quality+night.lights+election.township.mean+township.distance+surname.index+ethnic.index+log.hh+province, data=village)
stargazer(model.pruned, covariate.labels = c("Lineage leader is cadre", "Active lineage", "Distance to county seat",
                                             "Terrain roughness", "Agricultural suitability index", "Wealth (nighttime lights proxy)", 
                                             "Township control over elections", "Distance to township", "Surname fragmentation index",
                                             "Ethnic fragmentation index", "Number of households (log)"),
          omit = "province")